import pandas as pd
import matplotlib.pyplot as plt
# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")
# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)
# Create a scatter plot to compare TikTok Posts with Spotify Track Score
plt.figure(figsize=(10, 6))
plt.scatter(top_100_data['Track Score'], top_100_data['TikTok Posts'], alpha=0.7)
plt.title('Top 100 Songs: TikTok Posts vs. Spotify Track Score')
plt.xlabel('Spotify Track Score')
plt.ylabel('TikTok Posts')
plt.grid(True)
# Show the plot
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")
# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)
# Convert TikTok Posts from thousands to millions
top_100_data['TikTok Posts (in Millions)'] = top_100_data['TikTok Posts'] / 1_000_000
# Create a scatter plot using seaborn
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Track Score', y='TikTok Posts (in Millions)', data=top_100_data)
plt.title('Top 100 Songs: TikTok Posts (in Millions) vs. Spotify Track Score')
plt.xlabel('Spotify Track Score')
plt.ylabel('TikTok Posts (in Millions)')
plt.grid(True)
# Show the plot
plt.show()
import pandas as pd
import plotly.express as px
# Load the filtered data from your local file
filtered_data = pd.read_excel("C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx")
# Sort the data by Track Score and select the top 100 songs
top_100_data = filtered_data.sort_values(by='Track Score', ascending=False).head(100)
# Convert TikTok Posts from thousands to millions
top_100_data['TikTok Posts (in Millions)'] = top_100_data['TikTok Posts'] / 1_000_000
# Create an interactive scatter plot with a regression line using Plotly
fig = px.scatter(
top_100_data,
x='Track Score',
y='TikTok Posts (in Millions)',
hover_data=['Track', 'Artist'],
trendline="ols", # Add a regression line
title='Top 100 Songs: TikTok Posts (in Millions) vs. Spotify Track Score',
labels={
'Track Score': 'Spotify Track Score',
'TikTok Posts (in Millions)': 'TikTok Posts (in Millions)'
},
template='plotly', # Use a template for consistent styling
)
# Update the trendline (regression line) to be dashed
fig.update_traces(marker=dict(size=12), # Increase the size of the dots
line=dict(dash="dash")) # Make the regression line dashed
# Save the plot as an HTML file
fig.write_html("C:\\Users\\quent\\Desktop\\interactive_Spotify_TikTok_plot.html")
# Optional: To open the HTML file in a web browser automatically
import webbrowser
webbrowser.open("C:\\Users\\quent\\Desktop\\interactive_Spotify_TikTok_plot.html")
True
import plotly.express as px
import pandas as pd
# Load the dataset
file_path = "C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx"
data = pd.read_excel(file_path)
top_artists_by_tiktok = data.groupby('Artist')['TikTok Posts'].sum().sort_values(ascending=False).head(10)
import plotly.express as px
# Create a bar chart for the top 10 artists by TikTok Posts
fig_tiktok = px.bar(
top_artists_by_tiktok.reset_index(),
x='Artist',
y='TikTok Posts',
title='Top 10 Artists by TikTok Posts',
labels={'TikTok Posts': 'TikTok Posts'},
)
# Show the plot
fig_tiktok.show()
#aggregate data by artist and Spotify Playlist Count
top_artists_by_spotify = data.groupby('Artist')['Spotify Playlist Count'].sum().sort_values(ascending=False).head(10)
# Create a bar chart for the top 10 artists by Spotify Playlist Count
fig_spotify = px.bar(
top_artists_by_spotify.reset_index(),
x='Artist',
y='Spotify Playlist Count',
title='Top 10 Artists by Spotify Playlist Count',
labels={'Spotify Playlist Count': 'Spotify Playlist Count'},
)
# Show the plot
fig_spotify.show()
fig_tiktok.write_html('C:\\Users\\quent\\Downloads\\top_10_artists_by_tiktok.html')
fig_spotify.write_html('C:\\Users\\quent\\Downloads\\top_10_artists_by_spotify.html')
plt.figure(figsize=(10, 6))
sns.swarmplot(x=data['Track Score'], size=3)
plt.title('Swarm Plot of Track Scores')
plt.xlabel('Track Score')
plt.show()
C:\Users\quent\anaconda3\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 6.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. C:\Users\quent\anaconda3\Lib\site-packages\seaborn\categorical.py:3544: UserWarning: 65.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
import ipywidgets as widgets
from IPython.display import display
# Dropdown to select visualization type
dropdown = widgets.Dropdown(
options=['Histogram', 'Box Plot', 'Violin Plot'],
value='Histogram',
description='Plot Type:',
)
# Function to update the plot based on dropdown selection
def update_plot(plot_type):
if plot_type == 'Histogram':
sns.histplot(data['Track Score'], bins=20)
elif plot_type == 'Box Plot':
sns.boxplot(x=data['Track Score'])
elif plot_type == 'Violin Plot':
sns.violinplot(x=data['Track Score'])
plt.show()
# Observe dropdown changes and update plot
dropdown.observe(lambda change: update_plot(change['new']), names='value')
display(dropdown)
update_plot(dropdown.value)
Dropdown(description='Plot Type:', options=('Histogram', 'Box Plot', 'Violin Plot'), value='Histogram')
import seaborn as sns
import matplotlib.pyplot as plt
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
# Load your data
file_path = "C:\\Users\\quent\\Downloads\\Most_Streamed_Spotify_Songs_2024_filtered.xlsx"
data = pd.read_excel(file_path)
# Initialize the app
app = JupyterDash(__name__)
# Define the app layout
app.layout = html.Div([
dcc.Dropdown(
id='plot-type',
options=[
{'label': 'Histogram of Track Scores', 'value': 'hist'},
{'label': 'Box Plot of Track Scores', 'value': 'box'},
{'label': 'Violin Plot of Track Scores', 'value': 'violin'}
],
value='hist'
),
dcc.Graph(id='graph')
])
# Define the callback to update the graph
@app.callback(
Output('graph', 'figure'),
[Input('plot-type', 'value')]
)
def update_figure(plot_type):
if plot_type == 'hist':
fig = px.histogram(data, x='Track Score')
elif plot_type == 'box':
fig = px.box(data, x='Track Score')
elif plot_type == 'violin':
fig = px.violin(data, x='Track Score')
return fig
# Run the app in an external browser
app.run_server(mode='external')
Dash app running on http://127.0.0.1:8050/
C:\Users\quent\anaconda3\Lib\site-packages\dash\dash.py:556: UserWarning: JupyterDash is deprecated, use Dash instead. See https://dash.plotly.com/dash-in-jupyter for more details.